setwd("C:/Users/Daniel de Kadt/Dropbox (Personal)/Projects/SA_cohort_effects/replication")
library(foreign)
library(ebal)
library(Matching)
library(xtable)
library(ggplot2)
library(mediation)

data = read.dta("sasas_ddk12.dta")
head(data)

subset1=data[which(data$force>-2 & data$force<2 & data$race=="african/black" & (data$vote==1|data$vote==0)),]
head(subset1)

pdf(file="density_covs_1yr.pdf")
par(mfrow=c(4,2))
plot(density(subset1$sex[subset1$treat==1]), col="blue", main="Sex", xlab="")
lines(density(subset1$sex[subset1$treat==0]), col="red")
legend(x=0.25, y=1.75, col=c("blue","red"), legend=c("treated", "control"), lty=1,bty="n")
plot(density(na.omit(subset1$employment[subset1$treat==1])), col="blue", main="Employment", xlab="")
lines(density(na.omit(subset1$employment[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$income[subset1$treat==1])), col="blue", main="Income", xlab="")
lines(density(na.omit(subset1$income[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$wealth[subset1$treat==1])), col="blue", main="Wealth", xlab="")
lines(density(na.omit(subset1$wealth[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$married[subset1$treat==1])), col="blue", main="Married", xlab="")
lines(density(na.omit(subset1$married[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$prim_school[subset1$treat==1])), col="blue", main="Primary Schooling", xlab="")
lines(density(na.omit(subset1$prim_school[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$high_school[subset1$treat==1])), col="blue", main="High Schooling", xlab="")
lines(density(na.omit(subset1$high_school[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$health[subset1$treat==1])), col="blue", main="Health", xlab="")
lines(density(na.omit(subset1$health[subset1$treat==0])), col="red")
dev.off()

pval1=c()
pval1[1] = t.test(subset1$sex[subset1$treat==1],subset1$sex[subset1$treat==0])$p.value
pval1[2] = t.test(subset1$employment[subset1$treat==1],subset1$employment[subset1$treat==0])$p.value
pval1[3] = t.test(subset1$income[subset1$treat==1],subset1$income[subset1$treat==0])$p.value
pval1[4] = t.test(subset1$wealth[subset1$treat==1],subset1$wealth[subset1$treat==0])$p.value
pval1[5] = t.test(subset1$married[subset1$treat==1],subset1$married[subset1$treat==0])$p.value
pval1[6] = t.test(subset1$prim_school[subset1$treat==1],subset1$prim_school[subset1$treat==0])$p.value
pval1[7] = t.test(subset1$high_school[subset1$treat==1],subset1$high_school[subset1$treat==0])$p.value
pval1[8] = t.test(subset1$health[subset1$treat==1],subset1$health[subset1$treat==0])$p.value

diff=c()
diff[1] = t.test(subset1$sex[subset1$treat==1],subset1$sex[subset1$treat==0])$estimate[1] - t.test(subset1$sex[subset1$treat==1],subset1$sex[subset1$treat==0])$estimate[2]
diff[2] = t.test(subset1$employment[subset1$treat==1],subset1$employment[subset1$treat==0])$estimate[1] - t.test(subset1$employment[subset1$treat==1],subset1$employment[subset1$treat==0])$estimate[2]
diff[3] = t.test(subset1$income[subset1$treat==1],subset1$income[subset1$treat==0])$estimate[1] - t.test(subset1$employment[subset1$treat==1],subset1$income[subset1$treat==0])$estimate[2]
diff[4] = t.test(subset1$wealth[subset1$treat==1],subset1$wealth[subset1$treat==0])$estimate[1] - t.test(subset1$wealth[subset1$treat==1],subset1$wealth[subset1$treat==0])$estimate[2]
diff[5] = t.test(subset1$married[subset1$treat==1],subset1$married[subset1$treat==0])$estimate[1] - t.test(subset1$married[subset1$treat==1],subset1$married[subset1$treat==0])$estimate[2]
diff[6] = t.test(subset1$prim_school[subset1$treat==1],subset1$prim_school[subset1$treat==0])$estimate[1] - t.test(subset1$prim_school[subset1$treat==1],subset1$prim_school[subset1$treat==0])$estimate[2]
diff[7] = t.test(subset1$high_school[subset1$treat==1],subset1$high_school[subset1$treat==0])$estimate[1] - t.test(subset1$high_school[subset1$treat==1],subset1$high_school[subset1$treat==0])$estimate[2]
diff[8] = t.test(subset1$health[subset1$treat==1],subset1$health[subset1$treat==0])$estimate[1] - t.test(subset1$health[subset1$treat==1],subset1$health[subset1$treat==0])$estimate[2]

ks=c()
ks[1] = ks.test(subset1$sex[subset1$treat==1],subset1$sex[subset1$treat==0])$p.value
ks[2] = ks.test(subset1$employment[subset1$treat==1],subset1$employment[subset1$treat==0])$p.value
ks[3] = ks.test(subset1$income[subset1$treat==1],subset1$income[subset1$treat==0])$p.value
ks[4] = ks.test(subset1$wealth[subset1$treat==1],subset1$wealth[subset1$treat==0])$p.value
ks[5] = ks.test(subset1$married[subset1$treat==1],subset1$married[subset1$treat==0])$p.value
ks[6] = ks.test(subset1$prim_school[subset1$treat==1],subset1$prim_school[subset1$treat==0])$p.value
ks[7] = ks.test(subset1$high_school[subset1$treat==1],subset1$high_school[subset1$treat==0])$p.value
ks[8] = ks.test(subset1$health[subset1$treat==1],subset1$health[subset1$treat==0])$p.value


tab = cbind(c("Sex", "Employment", "Income", "Wealth",
              "Married", "Primary School", "High School", "Health"), round(diff,3), round(pval1,3), round(ks,3))

xtable(tab)

###WHITE####

subset1=data[which(data$force>-2 & data$force<2 & data$race=="white" & (data$vote==1|data$vote==0)),]
head(subset1)

pdf(file="density_covs_1yr_white.pdf")
par(mfrow=c(4,2))
plot(density(subset1$sex[subset1$treat==1]), col="blue", main="Sex", xlab="")
lines(density(subset1$sex[subset1$treat==0]), col="red")
legend(x=0.25, y=1.75, col=c("blue","red"), legend=c("treated", "control"), lty=1,bty="n")
plot(density(na.omit(subset1$employment[subset1$treat==1])), col="blue", main="Employment", xlab="")
lines(density(na.omit(subset1$employment[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$income[subset1$treat==1])), col="blue", main="Income", xlab="")
lines(density(na.omit(subset1$income[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$wealth[subset1$treat==1])), col="blue", main="Wealth", xlab="")
lines(density(na.omit(subset1$wealth[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$married[subset1$treat==1])), col="blue", main="Married", xlab="")
lines(density(na.omit(subset1$married[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$prim_school[subset1$treat==1])), col="blue", main="Primary Schooling", xlab="")
lines(density(na.omit(subset1$prim_school[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$high_school[subset1$treat==1])), col="blue", main="High Schooling", xlab="")
lines(density(na.omit(subset1$high_school[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$health[subset1$treat==1])), col="blue", main="Health", xlab="")
lines(density(na.omit(subset1$health[subset1$treat==0])), col="red")
dev.off()

pval1=c()
pval1[1] = t.test(subset1$sex[subset1$treat==1],subset1$sex[subset1$treat==0])$p.value
pval1[2] = t.test(subset1$employment[subset1$treat==1],subset1$employment[subset1$treat==0])$p.value
pval1[3] = t.test(subset1$income[subset1$treat==1],subset1$income[subset1$treat==0])$p.value
pval1[4] = t.test(subset1$wealth[subset1$treat==1],subset1$wealth[subset1$treat==0])$p.value
pval1[5] = t.test(subset1$married[subset1$treat==1],subset1$married[subset1$treat==0])$p.value
pval1[6] = t.test(subset1$prim_school[subset1$treat==1],subset1$prim_school[subset1$treat==0])$p.value
pval1[7] = t.test(subset1$high_school[subset1$treat==1],subset1$high_school[subset1$treat==0])$p.value
pval1[8] = t.test(subset1$health[subset1$treat==1],subset1$health[subset1$treat==0])$p.value

diff=c()
diff[1] = t.test(subset1$sex[subset1$treat==1],subset1$sex[subset1$treat==0])$estimate[1] - t.test(subset1$sex[subset1$treat==1],subset1$sex[subset1$treat==0])$estimate[2]
diff[2] = t.test(subset1$employment[subset1$treat==1],subset1$employment[subset1$treat==0])$estimate[1] - t.test(subset1$employment[subset1$treat==1],subset1$employment[subset1$treat==0])$estimate[2]
diff[3] = t.test(subset1$income[subset1$treat==1],subset1$income[subset1$treat==0])$estimate[1] - t.test(subset1$employment[subset1$treat==1],subset1$income[subset1$treat==0])$estimate[2]
diff[4] = t.test(subset1$wealth[subset1$treat==1],subset1$wealth[subset1$treat==0])$estimate[1] - t.test(subset1$wealth[subset1$treat==1],subset1$wealth[subset1$treat==0])$estimate[2]
diff[5] = t.test(subset1$married[subset1$treat==1],subset1$married[subset1$treat==0])$estimate[1] - t.test(subset1$married[subset1$treat==1],subset1$married[subset1$treat==0])$estimate[2]
diff[6] = t.test(subset1$prim_school[subset1$treat==1],subset1$prim_school[subset1$treat==0])$estimate[1] - t.test(subset1$prim_school[subset1$treat==1],subset1$prim_school[subset1$treat==0])$estimate[2]
diff[7] = t.test(subset1$high_school[subset1$treat==1],subset1$high_school[subset1$treat==0])$estimate[1] - t.test(subset1$high_school[subset1$treat==1],subset1$high_school[subset1$treat==0])$estimate[2]
diff[8] = t.test(subset1$health[subset1$treat==1],subset1$health[subset1$treat==0])$estimate[1] - t.test(subset1$health[subset1$treat==1],subset1$health[subset1$treat==0])$estimate[2]

ks=c()
ks[1] = ks.test(subset1$sex[subset1$treat==1],subset1$sex[subset1$treat==0])$p.value
ks[2] = ks.test(subset1$employment[subset1$treat==1],subset1$employment[subset1$treat==0])$p.value
ks[3] = ks.test(subset1$income[subset1$treat==1],subset1$income[subset1$treat==0])$p.value
ks[4] = ks.test(subset1$wealth[subset1$treat==1],subset1$wealth[subset1$treat==0])$p.value
ks[5] = ks.test(subset1$married[subset1$treat==1],subset1$married[subset1$treat==0])$p.value
ks[6] = ks.test(subset1$prim_school[subset1$treat==1],subset1$prim_school[subset1$treat==0])$p.value
ks[7] = ks.test(subset1$high_school[subset1$treat==1],subset1$high_school[subset1$treat==0])$p.value
ks[8] = ks.test(subset1$health[subset1$treat==1],subset1$health[subset1$treat==0])$p.value


tab = cbind(c("Sex", "Employment", "Income", "Wealth",
              "Married", "Primary School", "High School", "Health"), round(diff,3), round(pval1,3), round(ks,3))

xtable(tab)

###COLOURED###

subset1=data[which(data$force>-2 & data$force<2 & data$race=="coloured" & (data$vote==1|data$vote==0)),]
head(subset1)

pdf(file="density_covs_1yr_coloured.pdf")
par(mfrow=c(4,2))
plot(density(subset1$sex[subset1$treat==1]), col="blue", main="Sex", xlab="")
lines(density(subset1$sex[subset1$treat==0]), col="red")
legend(x=0.25, y=1.75, col=c("blue","red"), legend=c("treated", "control"), lty=1,bty="n")
plot(density(na.omit(subset1$employment[subset1$treat==1])), col="blue", main="Employment", xlab="")
lines(density(na.omit(subset1$employment[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$income[subset1$treat==1])), col="blue", main="Income", xlab="")
lines(density(na.omit(subset1$income[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$wealth[subset1$treat==1])), col="blue", main="Wealth", xlab="")
lines(density(na.omit(subset1$wealth[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$married[subset1$treat==1])), col="blue", main="Married", xlab="")
lines(density(na.omit(subset1$married[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$prim_school[subset1$treat==1])), col="blue", main="Primary Schooling", xlab="")
lines(density(na.omit(subset1$prim_school[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$high_school[subset1$treat==1])), col="blue", main="High Schooling", xlab="")
lines(density(na.omit(subset1$high_school[subset1$treat==0])), col="red")
plot(density(na.omit(subset1$health[subset1$treat==1])), col="blue", main="Health", xlab="")
lines(density(na.omit(subset1$health[subset1$treat==0])), col="red")
dev.off()

pval1=c()
pval1[1] = t.test(subset1$sex[subset1$treat==1],subset1$sex[subset1$treat==0])$p.value
pval1[2] = t.test(subset1$employment[subset1$treat==1],subset1$employment[subset1$treat==0])$p.value
pval1[3] = t.test(subset1$income[subset1$treat==1],subset1$income[subset1$treat==0])$p.value
pval1[4] = t.test(subset1$wealth[subset1$treat==1],subset1$wealth[subset1$treat==0])$p.value
pval1[5] = t.test(subset1$married[subset1$treat==1],subset1$married[subset1$treat==0])$p.value
pval1[6] = t.test(subset1$prim_school[subset1$treat==1],subset1$prim_school[subset1$treat==0])$p.value
pval1[7] = t.test(subset1$high_school[subset1$treat==1],subset1$high_school[subset1$treat==0])$p.value
pval1[8] = t.test(subset1$health[subset1$treat==1],subset1$health[subset1$treat==0])$p.value

diff=c()
diff[1] = t.test(subset1$sex[subset1$treat==1],subset1$sex[subset1$treat==0])$estimate[1] - t.test(subset1$sex[subset1$treat==1],subset1$sex[subset1$treat==0])$estimate[2]
diff[2] = t.test(subset1$employment[subset1$treat==1],subset1$employment[subset1$treat==0])$estimate[1] - t.test(subset1$employment[subset1$treat==1],subset1$employment[subset1$treat==0])$estimate[2]
diff[3] = t.test(subset1$income[subset1$treat==1],subset1$income[subset1$treat==0])$estimate[1] - t.test(subset1$employment[subset1$treat==1],subset1$income[subset1$treat==0])$estimate[2]
diff[4] = t.test(subset1$wealth[subset1$treat==1],subset1$wealth[subset1$treat==0])$estimate[1] - t.test(subset1$wealth[subset1$treat==1],subset1$wealth[subset1$treat==0])$estimate[2]
diff[5] = t.test(subset1$married[subset1$treat==1],subset1$married[subset1$treat==0])$estimate[1] - t.test(subset1$married[subset1$treat==1],subset1$married[subset1$treat==0])$estimate[2]
diff[6] = t.test(subset1$prim_school[subset1$treat==1],subset1$prim_school[subset1$treat==0])$estimate[1] - t.test(subset1$prim_school[subset1$treat==1],subset1$prim_school[subset1$treat==0])$estimate[2]
diff[7] = t.test(subset1$high_school[subset1$treat==1],subset1$high_school[subset1$treat==0])$estimate[1] - t.test(subset1$high_school[subset1$treat==1],subset1$high_school[subset1$treat==0])$estimate[2]
diff[8] = t.test(subset1$health[subset1$treat==1],subset1$health[subset1$treat==0])$estimate[1] - t.test(subset1$health[subset1$treat==1],subset1$health[subset1$treat==0])$estimate[2]

ks=c()
ks[1] = ks.test(subset1$sex[subset1$treat==1],subset1$sex[subset1$treat==0])$p.value
ks[2] = ks.test(subset1$employment[subset1$treat==1],subset1$employment[subset1$treat==0])$p.value
ks[3] = ks.test(subset1$income[subset1$treat==1],subset1$income[subset1$treat==0])$p.value
ks[4] = ks.test(subset1$wealth[subset1$treat==1],subset1$wealth[subset1$treat==0])$p.value
ks[5] = ks.test(subset1$married[subset1$treat==1],subset1$married[subset1$treat==0])$p.value
ks[6] = ks.test(subset1$prim_school[subset1$treat==1],subset1$prim_school[subset1$treat==0])$p.value
ks[7] = ks.test(subset1$high_school[subset1$treat==1],subset1$high_school[subset1$treat==0])$p.value
ks[8] = ks.test(subset1$health[subset1$treat==1],subset1$health[subset1$treat==0])$p.value


tab = cbind(c("Sex", "Employment", "Income", "Wealth",
              "Married", "Primary School", "High School", "Health"), round(diff,3), round(pval1,3), round(ks,3))

xtable(tab)

